# -*- coding: utf-8 -*-
"""
Created on Mon Jul  8 09:57:20 2024
@author: xinyuan.wei
FIA Data Analysis
"""
import os
import pandas as pd
import numpy as np

stage_min = 100
stage_max = 140

stage = f'{stage_min}-{stage_max}'

biomass_dive = f'biomass_dive_{stage}.csv'
biomass_even = f'biomass_even_{stage}.csv'
biomass_dosp = f'biomass_dosp_{stage}.csv'


# Get the current working directory
current_directory = os.getcwd()

# Get the parent directory
parent_directory = os.path.dirname(current_directory)
file_path = os.path.join(parent_directory, 'NE_Plot_Composition.csv')
data = pd.read_csv(file_path)

# Filter the data based on the given conditions
filtered_data = data[(data['CONDID'] == 1) & 
                     (data['CONDPROP_UNADJ'] >= 0.9) &
                     (data['eAG'] >= 1) &
                     (data['STDAGE'] > stage_min) &
                     (data['STDAGE'] <= stage_max)].copy()

##### Diversity analysis #####
# Calculate the minimum and maximum values of 'sindex'
sindex_min = filtered_data['sindex'].min()
sindex_max = filtered_data['sindex'].max()

# Create 10 bins between the minimum and maximum values
bins = np.linspace(sindex_min, sindex_max, 11)

# Bin the 'sindex' values based on the calculated bins
filtered_data['sindex_bin'] = pd.cut(filtered_data['sindex'], bins, right=False)

# Group by the sindex bins and calculate mean and standard deviation
grouped_sindex = filtered_data.groupby('sindex_bin', observed=False)['eAG'].agg(['mean', 'std', 'count']).reset_index()
grouped_sindex.columns = ['sindex_bin', 'eAG_mean', 'eAG_std', 'count']

save_file_path = os.path.join(parent_directory, 'Composition_Results', biomass_dive)
grouped_sindex.to_csv(save_file_path, index=False)


##### Evenness analysis #####
# Calculate the minimum and maximum values of 'evenness'
evenness_min = filtered_data['evenness'].min()
evenness_max = filtered_data['evenness'].max()

# Create 10 bins between the minimum and maximum values
bins = np.linspace(evenness_min, evenness_max, 11)

# Bin the 'evenness' values based on the calculated bins
filtered_data['evenness_bin'] = pd.cut(filtered_data['evenness'], bins, right=False)

# Group by the evenness bins and calculate mean and standard deviation
grouped_evenness = filtered_data.groupby('evenness_bin', observed=False)['eAG'].agg(['mean', 'std', 'count']).reset_index()
grouped_evenness.columns = ['evenness_bin', 'eAG_mean', 'eAG_std', 'count']

save_file_path = os.path.join(parent_directory, 'Composition_Results', biomass_even)
grouped_evenness.to_csv(save_file_path, index=False)

##### Dominant species analysis #####
# Group by the dominant tree species
grouped_dosp = filtered_data.groupby('dominant_area_species', observed=False)['eAG'].agg(['mean', 'std', 'count']).reset_index()
grouped_dosp.columns = ['dominant_area_species', 'eAG_mean', 'eAG_std', 'count']

save_file_path = os.path.join(parent_directory, 'Composition_Results', biomass_dosp)
grouped_dosp.to_csv(save_file_path, index=False)

# Sort the grouped data by record count in descending order
sorted_grouped_dosp = grouped_dosp.sort_values(by='count', ascending=False).reset_index(drop=True)

# Select the top 20 results based on count
top_20 = sorted_grouped_dosp.head(20)
top_20_species = top_20['dominant_area_species'].tolist()


